Import and Filter TOC

Some preliminary setup

Show the code
library(LEEF.analysis)
library(LEEF.measurement.toc)
library(dplyr)

Attaching package: 'dplyr'
The following objects are masked from 'package:stats':

    filter, lag
The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union
Show the code
library(tidyr)
library(ggplot2)
library(ggExtra)
options(dplyr.summarise.inform = FALSE)
Show the code
db <- "/Volumes/LEEF/0.RRD/LEEF 1/LEEF.RRD.sqlite"
options(RRDdb = db)


unlink(c("~/Desktop/1.pre-processed.data/toc/", "~/Desktop/2.extracted.data/toc/"), recursive = TRUE)
pre_processor_toc("~/Desktop/0.raw.data/", "~/Desktop/1.pre-processed.data/")
[1] "Log file set to ~/Desktop/1.pre-processed.data//toc/toc.log"

########################################################

Processing toc
done

########################################################
Show the code
extractor_toc("~/Desktop/1.pre-processed.data/", "~/Desktop/2.extracted.data/")
Extracting toc
[1] "Log file set to ~/Desktop/2.extracted.data//o2meter/o2meter.log"

########################################################
Extracting toc
Processing LEEF_21_09_20 und 21_09_22  C.txt ...
  |- Processing metadata ...
  |- Processing measurement parameter ...
  |- Processing actual data ...
  |- Saving files ...
Processing LEEF_21_09_24 1-8.txt ...
  |- Processing metadata ...
  |- Processing measurement parameter ...
  |- Processing actual data ...
  |- Saving files ...
Processing LEEF_21_09_24 und 21_09_27 B.txt ...
  |- Processing metadata ...
  |- Processing measurement parameter ...
  |- Processing actual data ...
  |- Saving files ...
Processing LEEF_21_09_27 restB.txt ...
  |- Processing metadata ...
  |- Processing measurement parameter ...
  |- Processing actual data ...
  |- Saving files ...
Processing LEEF_21_09_29t.txt ...
  |- Processing metadata ...
  |- Processing measurement parameter ...
  |- Processing actual data ...
  |- Saving files ...
Processing LEEF_21_10_01.txt ...
  |- Processing metadata ...
  |- Processing measurement parameter ...
  |- Processing actual data ...
  |- Saving files ...
Processing LEEF_21_10_04 S22 - S30.txt ...
  |- Processing metadata ...
  |- Processing measurement parameter ...
  |- Processing actual data ...
  |- Saving files ...
Processing LEEF_21_10_04 und 21_10_06A.txt ...
  |- Processing metadata ...
  |- Processing measurement parameter ...
  |- Processing actual data ...
  |- Saving files ...
Processing LEEF_21_10_06.txt ...
  |- Processing metadata ...
  |- Processing measurement parameter ...
  |- Processing actual data ...
  |- Saving files ...
Processing LEEF_21_10_08.txt ...
  |- Processing metadata ...
  |- Processing measurement parameter ...
  |- Processing actual data ...
  |- Saving files ...
Processing LEEF_21_10_11.txt ...
  |- Processing metadata ...
  |- Processing measurement parameter ...
  |- Processing actual data ...
  |- Saving files ...
Processing LEEF_21_10_13.txt ...
  |- Processing metadata ...
  |- Processing measurement parameter ...
  |- Processing actual data ...
  |- Saving files ...
Processing LEEF_21_10_15 s8 - 26.txt ...
  |- Processing metadata ...
  |- Processing measurement parameter ...
  |- Processing actual data ...
  |- Saving files ...
Processing LEEF_21_10_15.txt ...
  |- Processing metadata ...
  |- Processing measurement parameter ...
  |- Processing actual data ...
  |- Saving files ...
Processing LEEF_21_10_18.txt ...
  |- Processing metadata ...
  |- Processing measurement parameter ...
  |- Processing actual data ...
  |- Saving files ...
Processing LEEF_21_10_20.txt ...
  |- Processing metadata ...
  |- Processing measurement parameter ...
  |- Processing actual data ...
  |- Saving files ...
Processing LEEF_21_10_22.txt ...
  |- Processing metadata ...
  |- Processing measurement parameter ...
  |- Processing actual data ...
  |- Saving files ...
Processing LEEF_21_10_25.txt ...
  |- Processing metadata ...
  |- Processing measurement parameter ...
  |- Processing actual data ...
  |- Saving files ...
Processing LEEF_21_10_27.txt ...
  |- Processing metadata ...
  |- Processing measurement parameter ...
  |- Processing actual data ...
  |- Saving files ...
Processing LEEF_21_10_29 und 21_11_01.txt ...
  |- Processing metadata ...
  |- Processing measurement parameter ...
  |- Processing actual data ...
  |- Saving files ...
Processing LEEF_21_11_03 und 21_11_05.txt ...
  |- Processing metadata ...
  |- Processing measurement parameter ...
  |- Processing actual data ...
  |- Saving files ...
Processing LEEF_21_11_08 und 21_11_10.txt ...
  |- Processing metadata ...
  |- Processing measurement parameter ...
  |- Processing actual data ...
  |- Saving files ...
Processing LEEF_21_11_12.txt ...
  |- Processing metadata ...
  |- Processing measurement parameter ...
  |- Processing actual data ...
  |- Saving files ...
Processing LEEF_21_11_17 und 21_11_19.txt ...
  |- Processing metadata ...
  |- Processing measurement parameter ...
  |- Processing actual data ...
  |- Saving files ...
Processing LEEF_21_11_22 und 21_11_24.txt ...
  |- Processing metadata ...
  |- Processing measurement parameter ...
  |- Processing actual data ...
  |- Saving files ...
Processing LEEF_21_11_26 und 21_11_29.txt ...
  |- Processing metadata ...
  |- Processing measurement parameter ...
  |- Processing actual data ...
  |- Saving files ...
Processing LEEF_21_12_01 und 21_12_03A.txt ...
  |- Processing metadata ...
  |- Processing measurement parameter ...
  |- Processing actual data ...
  |- Saving files ...
Processing LEEF_21_12_03B.txt ...
  |- Processing metadata ...
  |- Processing measurement parameter ...
  |- Processing actual data ...
  |- Saving files ...
Processing LEEF_21_12_06 und 21_12_08 1-8A.txt ...
  |- Processing metadata ...
  |- Processing measurement parameter ...
  |- Processing actual data ...
  |- Saving files ...
Processing LEEF_21_12_08B.txt ...
  |- Processing metadata ...
  |- Processing measurement parameter ...
  |- Processing actual data ...
  |- Saving files ...
Processing LEEF_21_12_10 und 21_12_13.txt ...
  |- Processing metadata ...
  |- Processing measurement parameter ...
  |- Processing actual data ...
  |- Saving files ...
Processing LEEF_21_12_15 und 21_12_17.txt ...
  |- Processing metadata ...
  |- Processing measurement parameter ...
  |- Processing actual data ...
  |- Saving files ...
Processing LEEF_21_12_20 und 21_12_22.txt ...
  |- Processing metadata ...
  |- Processing measurement parameter ...
  |- Processing actual data ...
  |- Saving files ...
Processing LEEF_21_12_24.txt ...
  |- Processing metadata ...
  |- Processing measurement parameter ...
  |- Processing actual data ...
  |- Saving files ...
Processing LEEF_21_12_27 und 21_12_29.txt ...
  |- Processing metadata ...
  |- Processing measurement parameter ...
  |- Processing actual data ...
  |- Saving files ...
Processing LEEF_21_12_31 und 22_01_03.txt ...
  |- Processing metadata ...
  |- Processing measurement parameter ...
  |- Processing actual data ...
  |- Saving files ...
Processing LEEF_21_15_10 rest 2.txt ...
  |- Processing metadata ...
  |- Processing measurement parameter ...
  |- Processing actual data ...
  |- Saving files ...
Processing LEEF_22_01_05 und 22_01_07A.txt ...
  |- Processing metadata ...
  |- Processing measurement parameter ...
  |- Processing actual data ...
  |- Saving files ...
Processing LEEF_22_01_10 und 22_01_12.txt ...
  |- Processing metadata ...
  |- Processing measurement parameter ...
  |- Processing actual data ...
  |- Saving files ...
Processing LEEF_22_01_14 und 22_01_17.txt ...
  |- Processing metadata ...
  |- Processing measurement parameter ...
  |- Processing actual data ...
  |- Saving files ...
Processing LEEF_22_01_19 und 22_01_21A.txt ...
  |- Processing metadata ...
  |- Processing measurement parameter ...
  |- Processing actual data ...
  |- Saving files ...
Processing LEEF_22_01_19 und 22_01_21B.txt ...
  |- Processing metadata ...
  |- Processing measurement parameter ...
  |- Processing actual data ...
  |- Saving files ...
Processing LEEF_22_01_24 und 22_01_26.txt ...
  |- Processing metadata ...
  |- Processing measurement parameter ...
  |- Processing actual data ...
  |- Saving files ...
Processing LEEF_22_01_28 und 22_01_31A.txt ...
  |- Processing metadata ...
  |- Processing measurement parameter ...
  |- Processing actual data ...
  |- Saving files ...
Processing LEEF_22_02_02 und 22_02_04.txt ...
  |- Processing metadata ...
  |- Processing measurement parameter ...
  |- Processing actual data ...
  |- Saving files ...
Processing LEEF_22_02_07B.txt ...
  |- Processing metadata ...
  |- Processing measurement parameter ...
  |- Processing actual data ...
  |- Saving files ...
Processing LEEF_22_02_07D.txt ...
  |- Processing metadata ...
  |- Processing measurement parameter ...
  |- Processing actual data ...
  |- Saving files ...
Processing LEEF_22_02_09 und 22_02_11A.txt ...
  |- Processing metadata ...
  |- Processing measurement parameter ...
  |- Processing actual data ...
  |- Saving files ...
Processing LEEF_22_02_14 und 22_02_16A.txt ...
  |- Processing metadata ...
  |- Processing measurement parameter ...
  |- Processing actual data ...
  |- Saving files ...
Processing LEEF_22_02_16B.txt ...
  |- Processing metadata ...
  |- Processing measurement parameter ...
  |- Processing actual data ...
  |- Saving files ...
Processing LEEF_22_02_18A.txt ...
  |- Processing metadata ...
  |- Processing measurement parameter ...
  |- Processing actual data ...
  |- Saving files ...
Processing LEEF_22_02_18B.txt ...
  |- Processing metadata ...
  |- Processing measurement parameter ...
  |- Processing actual data ...
  |- Saving files ...
Processing LEEF_22_02_21A.txt ...
  |- Processing metadata ...
  |- Processing measurement parameter ...
  |- Processing actual data ...
  |- Saving files ...
Processing LEEF_22_02_23A.txt ...
  |- Processing metadata ...
  |- Processing measurement parameter ...
  |- Processing actual data ...
  |- Saving files ...
Processing LEEF_22_02_25.txt ...
  |- Processing metadata ...
  |- Processing measurement parameter ...
  |- Processing actual data ...
  |- Saving files ...
Processing LEEF_22_02_28.txt ...
  |- Processing metadata ...
  |- Processing measurement parameter ...
  |- Processing actual data ...
  |- Saving files ...
Processing LEEF_22_03_02.txt ...
  |- Processing metadata ...
  |- Processing measurement parameter ...
  |- Processing actual data ...
  |- Saving files ...
Processing LEEF_22_03_04.txt ...
  |- Processing metadata ...
  |- Processing measurement parameter ...
  |- Processing actual data ...
  |- Saving files ...
Processing LEEF_22_03_07.txt ...
  |- Processing metadata ...
  |- Processing measurement parameter ...
  |- Processing actual data ...
  |- Saving files ...
Processing LEEF_22_03_09.txt ...
  |- Processing metadata ...
  |- Processing measurement parameter ...
  |- Processing actual data ...
  |- Saving files ...
Processing LEEF_22_03_11.txt ...
  |- Processing metadata ...
  |- Processing measurement parameter ...
  |- Processing actual data ...
  |- Saving files ...
Processing LEEF_22_03_14.txt ...
  |- Processing metadata ...
  |- Processing measurement parameter ...
  |- Processing actual data ...
  |- Saving files ...
Processing LEEF_22_03_16.txt ...
  |- Processing metadata ...
  |- Processing measurement parameter ...
  |- Processing actual data ...
  |- Saving files ...
Processing LEEF_22_03_18.txt ...
  |- Processing metadata ...
  |- Processing measurement parameter ...
  |- Processing actual data ...
  |- Saving files ...
Processing LEEF_22_03_21.txt ...
  |- Processing metadata ...
  |- Processing measurement parameter ...
  |- Processing actual data ...
  |- Saving files ...
Processing LEEF_22_03_23.txt ...
  |- Processing metadata ...
  |- Processing measurement parameter ...
  |- Processing actual data ...
  |- Saving files ...
Processing LEEF_22_03_25.txt ...
  |- Processing metadata ...
  |- Processing measurement parameter ...
  |- Processing actual data ...
  |- Saving files ...
Processing LEEF_22_03_28.txt ...
  |- Processing metadata ...
  |- Processing measurement parameter ...
  |- Processing actual data ...
  |- Saving files ...
Processing LEEF_22_03_30A.txt ...
  |- Processing metadata ...
  |- Processing measurement parameter ...
  |- Processing actual data ...
  |- Saving files ...
Processing LEEF_22_03_30B.txt ...
  |- Processing metadata ...
  |- Processing measurement parameter ...
  |- Processing actual data ...
  |- Saving files ...
Processing LEEF_22_04_01.txt ...
  |- Processing metadata ...
  |- Processing measurement parameter ...
  |- Processing actual data ...
  |- Saving files ...
Processing LEEF_22_04_04A.txt ...
  |- Processing metadata ...
  |- Processing measurement parameter ...
  |- Processing actual data ...
  |- Saving files ...
Processing LEEF_22_04_04B.txt ...
  |- Processing metadata ...
  |- Processing measurement parameter ...
  |- Processing actual data ...
  |- Saving files ...
Processing LEEF_22_04_06.txt ...
  |- Processing metadata ...
  |- Processing measurement parameter ...
  |- Processing actual data ...
  |- Saving files ...
Processing LEEF_22_04_08.txt ...
  |- Processing metadata ...
  |- Processing measurement parameter ...
  |- Processing actual data ...
  |- Saving files ...
Processing LEEF_22_04_11.txt ...
  |- Processing metadata ...
  |- Processing measurement parameter ...
  |- Processing actual data ...
  |- Saving files ...
Processing LEEF_22_04_13.txt ...
  |- Processing metadata ...
  |- Processing measurement parameter ...
  |- Processing actual data ...
  |- Saving files ...
Processing LEEF_22_04_15A.txt ...
  |- Processing metadata ...
  |- Processing measurement parameter ...
  |- Processing actual data ...
  |- Saving files ...
Processing LEEF_22_04_15B.txt ...
  |- Processing metadata ...
  |- Processing measurement parameter ...
  |- Processing actual data ...
  |- Saving files ...
Processing LEEF_22_04_18.txt ...
  |- Processing metadata ...
  |- Processing measurement parameter ...
  |- Processing actual data ...
  |- Saving files ...
Processing LEEF_22_04_20.txt ...
  |- Processing metadata ...
  |- Processing measurement parameter ...
  |- Processing actual data ...
  |- Saving files ...
Processing LEEF_22_04_22.txt ...
  |- Processing metadata ...
  |- Processing measurement parameter ...
  |- Processing actual data ...
  |- Saving files ...
Processing LEEF_22_04_25.txt ...
  |- Processing metadata ...
  |- Processing measurement parameter ...
  |- Processing actual data ...
  |- Saving files ...
Processing LEEF_22_04_27.txt ...
  |- Processing metadata ...
  |- Processing measurement parameter ...
  |- Processing actual data ...
  |- Saving files ...
Processing LEEF_22_04_29.txt ...
  |- Processing metadata ...
  |- Processing measurement parameter ...
  |- Processing actual data ...
  |- Saving files ...
Processing LEEF_22_05_02.txt ...
  |- Processing metadata ...
  |- Processing measurement parameter ...
  |- Processing actual data ...
  |- Saving files ...
Processing LEEF_22_05_04.txt ...
  |- Processing metadata ...
  |- Processing measurement parameter ...
  |- Processing actual data ...
  |- Saving files ...
Processing LEEF_22_05_06.txt ...
  |- Processing metadata ...
  |- Processing measurement parameter ...
  |- Processing actual data ...
  |- Saving files ...
Processing LEEF_22_05_09.txt ...
  |- Processing metadata ...
  |- Processing measurement parameter ...
  |- Processing actual data ...
  |- Saving files ...
Processing LEEF_22_05_11.txt ...
  |- Processing metadata ...
  |- Processing measurement parameter ...
  |- Processing actual data ...
  |- Saving files ...
Processing LEEF_22_05_13.txt ...
  |- Processing metadata ...
  |- Processing measurement parameter ...
  |- Processing actual data ...
  |- Saving files ...
Processing LEEF_22_05_16.txt ...
  |- Processing metadata ...
  |- Processing measurement parameter ...
  |- Processing actual data ...
  |- Saving files ...
Processing LEEF_22_05_18.txt ...
  |- Processing metadata ...
  |- Processing measurement parameter ...
  |- Processing actual data ...
  |- Saving files ...
Processing LEEF_22_05_20.txt ...
  |- Processing metadata ...
  |- Processing measurement parameter ...
  |- Processing actual data ...
  |- Saving files ...
Processing LEEF_22_05_23A.txt ...
  |- Processing metadata ...
  |- Processing measurement parameter ...
  |- Processing actual data ...
  |- Saving files ...
Processing LEEF_22_05_25A.txt ...
  |- Processing metadata ...
  |- Processing measurement parameter ...
  |- Processing actual data ...
  |- Saving files ...
Processing LEEF_22_05_27A.txt ...
  |- Processing metadata ...
  |- Processing measurement parameter ...
  |- Processing actual data ...
  |- Saving files ...
Processing LEEF_22_05_30A.txt ...
  |- Processing metadata ...
  |- Processing measurement parameter ...
  |- Processing actual data ...
  |- Saving files ...
Processing LEEF_22_06_01A.txt ...
  |- Processing metadata ...
  |- Processing measurement parameter ...
  |- Processing actual data ...
  |- Saving files ...
Processing LEEF_22_06_03A.txt ...
  |- Processing metadata ...
  |- Processing measurement parameter ...
  |- Processing actual data ...
  |- Saving files ...
Processing LEEF_22_06_06A.txt ...
  |- Processing metadata ...
  |- Processing measurement parameter ...
  |- Processing actual data ...
  |- Saving files ...
Processing LEEF_22_06_08A.txt ...
  |- Processing metadata ...
  |- Processing measurement parameter ...
  |- Processing actual data ...
  |- Saving files ...
Processing LEEF_22_06_10B.txt ...
  |- Processing metadata ...
  |- Processing measurement parameter ...
  |- Processing actual data ...
  |- Saving files ...
Processing LEEF_22_06_13A.txt ...
  |- Processing metadata ...
  |- Processing measurement parameter ...
  |- Processing actual data ...
  |- Saving files ...
Processing LEEF_22_06_15A.txt ...
  |- Processing metadata ...
  |- Processing measurement parameter ...
  |- Processing actual data ...
  |- Saving files ...
done

########################################################
Show the code
add_to_and_overwrite_table_in_RRD(input = "~/Desktop/2.extracted.data/", dbname = db, overwrite = TRUE)
[1] 0
Show the code
toc <- LEEF.analysis::db_read_table(table = "toc__toc") %>%
  collect()

toc_original <- toc

# conn <- DBI::dbConnect(
#   drv = RSQLite::SQLite(),
#   db = db
# )
# 
# DBI::dbBegin(conn)
# DBI::dbWriteTable(
#   conn,
#   name = "toc__toc_original",
#   value = toc_original,
#   overwrite = TRUE,
#   append = FALSE
# )
# DBI::dbCommit(conn)
# 
# DBI::dbDisconnect(conn)

toc$id <- 1:nrow(toc)

Remove missing inj_type

We have the following measurements files with measurements without inj_type or `NULL.

Show the code
toc %>%
  filter(inj_type == "") %>%
  group_by(filename) %>%
  summarise(n = n()) %>% 
  collect() %>%
  knitr::kable()
filename n
LEEF_21_09_24 1-8 208
LEEF_22_01_19 und 22_01_21A 196
LEEF_22_02_09 und 22_02_11A 3
LEEF_22_02_14 und 22_02_16A 3
LEEF_22_02_16B 3
LEEF_22_04_04A 54
LEEF_22_04_15A 58
LEEF_22_06_03A 6
LEEF_22_06_06A 6
LEEF_22_06_08A 6
LEEF_22_06_10B 6
LEEF_22_06_13A 6
LEEF_22_06_15A 6

These can be filtered out

Show the code
before <- nrow(toc)
toc <- toc %>%
  filter(inj_type != "")
after <- nrow(toc)
cat("Before : ", before, "\n")
Before :  16564 
Show the code
cat("Removed: ", before - after, "\n")
Removed:  561 
Show the code
cat("After  : ", after, "\n")
After  :  16003 

Rows where conc == 0

Here it gets tricky. There are cases, where either conc_1 == 0 or conc_2 == 0, but conc_3 != 0, which results in a valid measurement, i.e. conc != 0. I would suggest to remove all rows, in which conc == 0

Show the code
toc %>%
  filter(conc == 0) %>%
  group_by(filename) %>%
  summarise(n = n()) %>% 
  collect() %>%
  knitr::kable()
filename n
LEEF_21_09_20 und 21_09_22 C 4
LEEF_21_09_24 1-8 12
LEEF_21_09_24 und 21_09_27 B 39
LEEF_21_09_27 restB 8
LEEF_21_09_29t 8
LEEF_21_10_01 8
LEEF_21_10_04 S22 - S30 6
LEEF_21_10_04 und 21_10_06A 160
LEEF_21_10_06 8
LEEF_21_10_08 8
LEEF_21_10_11 8
LEEF_21_10_13 4
LEEF_21_10_15 4
LEEF_21_10_15 s8 - 26 22
LEEF_21_10_18 4
LEEF_21_10_20 4
LEEF_21_10_22 4
LEEF_21_10_25 4
LEEF_21_10_27 4
LEEF_21_10_29 und 21_11_01 6
LEEF_21_11_03 und 21_11_05 6
LEEF_21_11_08 und 21_11_10 6
LEEF_21_11_12 94
LEEF_21_11_17 und 21_11_19 6
LEEF_21_11_22 und 21_11_24 6
LEEF_21_11_26 und 21_11_29 7
LEEF_21_12_01 und 21_12_03A 113
LEEF_21_12_03B 4
LEEF_21_12_06 und 21_12_08 1-8A 85
LEEF_21_12_08B 1
LEEF_21_12_10 und 21_12_13 6
LEEF_21_12_15 und 21_12_17 5
LEEF_21_12_20 und 21_12_22 6
LEEF_21_12_24 4
LEEF_21_12_27 und 21_12_29 4
LEEF_21_12_31 und 22_01_03 5
LEEF_21_15_10 rest 2 2
LEEF_22_01_05 und 22_01_07A 6
LEEF_22_01_10 und 22_01_12 6
LEEF_22_01_14 und 22_01_17 6
LEEF_22_01_19 und 22_01_21A 15
LEEF_22_01_19 und 22_01_21B 6
LEEF_22_01_24 und 22_01_26 6
LEEF_22_01_28 und 22_01_31A 4
LEEF_22_02_02 und 22_02_04 4
LEEF_22_02_07B 4
LEEF_22_02_07D 2
LEEF_22_02_09 und 22_02_11A 2
LEEF_22_02_14 und 22_02_16A 76
LEEF_22_02_18A 4
LEEF_22_02_18B 3
LEEF_22_02_21A 3
LEEF_22_02_23A 6
LEEF_22_02_25 3
LEEF_22_02_28 6
LEEF_22_03_02 4
LEEF_22_03_04 1
LEEF_22_03_07 3
LEEF_22_03_09 2
LEEF_22_03_11 2
LEEF_22_03_14 5
LEEF_22_03_16 5
LEEF_22_03_18 3
LEEF_22_03_21 4
LEEF_22_03_23 5
LEEF_22_03_25 4
LEEF_22_03_28 3
LEEF_22_03_30A 3
LEEF_22_03_30B 2
LEEF_22_04_01 4
LEEF_22_04_04A 61
LEEF_22_04_04B 2
LEEF_22_04_06 5
LEEF_22_04_08 5
LEEF_22_04_11 4
LEEF_22_04_13 4
LEEF_22_04_15A 5
LEEF_22_04_15B 2
LEEF_22_04_18 4
LEEF_22_04_20 5
LEEF_22_04_22 6
LEEF_22_04_25 5
LEEF_22_04_27 5
LEEF_22_04_29 5
LEEF_22_05_02 5
LEEF_22_05_04 3
LEEF_22_05_06 5
LEEF_22_05_09 5
LEEF_22_05_11 5
LEEF_22_05_13 7
LEEF_22_05_16 4
LEEF_22_05_18 7
LEEF_22_05_20 5
LEEF_22_05_23A 7
LEEF_22_05_25A 7
LEEF_22_05_27A 5
LEEF_22_05_30A 5
LEEF_22_06_01A 5
LEEF_22_06_03A 3
LEEF_22_06_06A 2
LEEF_22_06_08A 2
LEEF_22_06_10B 3
LEEF_22_06_13A 2
LEEF_22_06_15A 5
Show the code
before <- nrow(toc)
toc <- toc %>%
  filter(conc > 0)
after <- nrow(toc)
cat("Before : ", before, "\n")
Before :  16003 
Show the code
cat("Removed: ", before - after, "\n")
Removed:  1205 
Show the code
cat("After  : ", after, "\n")
After  :  14798 

Check timestamp / id / Filename

In each file, there should be a maximum of 2 timestamps, except of files which contain two timestamps.

Show the code
toc %>%
  select(filename, timestamp, identification, inj_type) %>%
  filter(identification != "H2O" & identification != "2.5mg/lIC") %>%
  group_by(filename, timestamp) %>%
  summarise(timestamps_count = n()) %>%
  group_by(filename) %>%
  summarise(number_of_timestamps = n()) %>%
  arrange(desc(number_of_timestamps)) %>%
  knitr::kable()
filename number_of_timestamps
LEEF_21_09_20 und 21_09_22 C 2
LEEF_21_09_24 und 21_09_27 B 2
LEEF_21_10_04 und 21_10_06A 2
LEEF_21_10_29 und 21_11_01 2
LEEF_21_11_03 und 21_11_05 2
LEEF_21_11_08 und 21_11_10 2
LEEF_21_11_12 2
LEEF_21_11_17 und 21_11_19 2
LEEF_21_11_22 und 21_11_24 2
LEEF_21_11_26 und 21_11_29 2
LEEF_21_12_01 und 21_12_03A 2
LEEF_21_12_06 und 21_12_08 1-8A 2
LEEF_21_12_10 und 21_12_13 2
LEEF_21_12_15 und 21_12_17 2
LEEF_21_12_20 und 21_12_22 2
LEEF_21_12_27 und 21_12_29 2
LEEF_21_12_31 und 22_01_03 2
LEEF_22_01_05 und 22_01_07A 2
LEEF_22_01_10 und 22_01_12 2
LEEF_22_01_14 und 22_01_17 2
LEEF_22_01_19 und 22_01_21B 2
LEEF_22_01_24 und 22_01_26 2
LEEF_22_01_28 und 22_01_31A 2
LEEF_22_02_02 und 22_02_04 2
LEEF_22_02_09 und 22_02_11A 2
LEEF_22_02_14 und 22_02_16A 2
LEEF_21_09_24 1-8 1
LEEF_21_09_27 restB 1
LEEF_21_09_29t 1
LEEF_21_10_01 1
LEEF_21_10_04 S22 - S30 1
LEEF_21_10_06 1
LEEF_21_10_08 1
LEEF_21_10_11 1
LEEF_21_10_13 1
LEEF_21_10_15 1
LEEF_21_10_15 s8 - 26 1
LEEF_21_10_18 1
LEEF_21_10_20 1
LEEF_21_10_22 1
LEEF_21_10_25 1
LEEF_21_10_27 1
LEEF_21_12_03B 1
LEEF_21_12_08B 1
LEEF_21_12_24 1
LEEF_21_15_10 rest 2 1
LEEF_22_01_19 und 22_01_21A 1
LEEF_22_02_07B 1
LEEF_22_02_07D 1
LEEF_22_02_16B 1
LEEF_22_02_18A 1
LEEF_22_02_18B 1
LEEF_22_02_21A 1
LEEF_22_02_23A 1
LEEF_22_02_25 1
LEEF_22_02_28 1
LEEF_22_03_02 1
LEEF_22_03_04 1
LEEF_22_03_07 1
LEEF_22_03_09 1
LEEF_22_03_11 1
LEEF_22_03_14 1
LEEF_22_03_16 1
LEEF_22_03_18 1
LEEF_22_03_21 1
LEEF_22_03_23 1
LEEF_22_03_25 1
LEEF_22_03_28 1
LEEF_22_03_30A 1
LEEF_22_03_30B 1
LEEF_22_04_01 1
LEEF_22_04_04A 1
LEEF_22_04_04B 1
LEEF_22_04_06 1
LEEF_22_04_08 1
LEEF_22_04_11 1
LEEF_22_04_13 1
LEEF_22_04_15A 1
LEEF_22_04_15B 1
LEEF_22_04_18 1
LEEF_22_04_20 1
LEEF_22_04_22 1
LEEF_22_04_25 1
LEEF_22_04_27 1
LEEF_22_04_29 1
LEEF_22_05_02 1
LEEF_22_05_04 1
LEEF_22_05_06 1
LEEF_22_05_09 1
LEEF_22_05_11 1
LEEF_22_05_13 1
LEEF_22_05_16 1
LEEF_22_05_18 1
LEEF_22_05_20 1
LEEF_22_05_23A 1
LEEF_22_05_25A 1
LEEF_22_05_27A 1
LEEF_22_05_30A 1
LEEF_22_06_01A 1
LEEF_22_06_03A 1
LEEF_22_06_06A 1
LEEF_22_06_08A 1
LEEF_22_06_10B 1
LEEF_22_06_13A 1
LEEF_22_06_15A 1

Fixed two typos in the raw data. Now, it looks OK for me.

  • LEEF_21_11_12: “Analysis Name”,“LEEF_21_11_12 und 21_15_10A.adb”
  • LEEF_22_01_19 und 22_01_21A: All analysis for 22_01_21 are not valid

Identify TOC measurements which are based on missing IC or TC measurements

We have to identify and remove the TOC measurements which are using wrong measurements, i.e. TC or IC not available.

The following rows need to be removed:

Show the code
inj_type <- list(
  tc = toc %>%
    filter(inj_type == "TC" & bottle != "") %>%
    select(filename, timestamp, bottle, conc_tc = conc, id_tc = id),
  ic = toc %>%
    filter(inj_type == "IC" & bottle != "") %>%
    select(filename, timestamp, bottle, conc_ic = conc, id_ic = id),
  toc = toc%>%
    filter(inj_type == "TOC" & bottle != "") %>%
    select(filename, timestamp, bottle, conc_toc = conc, id_toc = id),
  tn = toc%>%
    filter(inj_type == "TN" & bottle != "") %>%
    select(filename, timestamp, bottle, conc_tn = conc, id_tn = id)
)
x <- merge(inj_type$tc, inj_type$ic, by = c("filename", "timestamp", "bottle"), all = TRUE)
x <- merge(x, inj_type$toc, by = c("filename", "timestamp", "bottle"), all = TRUE)
x <- merge(x, inj_type$tn, by = c("filename", "timestamp", "bottle"), all = TRUE)
x$TOC <- x$conc_tc - x$conc_ic

i <- x[is.na(x$TOC),"id_toc"]
toc[toc$id %in% i,]
# A tibble: 16 × 13
   filename    anays…¹ times…² bottle posit…³ ident…⁴ inj_t…⁵  conc    cv conc_1
   <chr>       <chr>     <int> <chr>    <int> <chr>   <chr>   <dbl> <dbl>  <dbl>
 1 LEEF_21_09… 2021-1…  2.02e7 b_19        51 27.09.… TOC     48.6     NA     NA
 2 LEEF_21_09… 2021-1…  2.02e7 b_24        56 27.09.… TOC     49.7     NA     NA
 3 LEEF_21_10… 2021-1…  2.02e7 b_23        25 04.10.… TOC      0.13    NA     NA
 4 LEEF_21_10… 2021-1…  2.02e7 b_26        58 06.10.… TOC      0.19    NA     NA
 5 LEEF_21_10… 2021-1…  2.02e7 b_27        59 15.11.… TOC     58.9     NA     NA
 6 LEEF_21_11… 2021-1…  2.02e7 b_28        60 15.11.… TOC      0.82    NA     NA
 7 LEEF_21_11… 2021-1…  2.02e7 b_07        39 29.11.… TOC     33.9     NA     NA
 8 LEEF_21_12… 2021-1…  2.02e7 b_20        52 03.12.… TOC      0.92    NA     NA
 9 LEEF_21_12… 2021-1…  2.02e7 b_10        42 08.12.… TOC     28.9     NA     NA
10 LEEF_21_12… 2021-1…  2.02e7 b_25        57 08.12.… TOC      0.81    NA     NA
11 LEEF_22_01… 2022-0…  2.02e7 b_10        11 19.01.… TOC     51.1     NA     NA
12 LEEF_22_02… 2022-0…  2.02e7 b_13        45 16.02.… TOC     52.9     NA     NA
13 LEEF_22_02… 2022-0…  2.02e7 b_20        21 18.02.… TOC     46.0     NA     NA
14 LEEF_22_03… 2022-0…  2.02e7 b_03         5 30.03.… TOC     35.8     NA     NA
15 LEEF_22_04… 2022-0…  2.02e7 b_04         6 04.04.… TOC     32.2     NA     NA
16 LEEF_22_04… 2022-0…  2.02e7 b_17        19 15.04.… TOC     38.1     NA     NA
# … with 3 more variables: conc_2 <dbl>, conc_3 <dbl>, id <int>, and
#   abbreviated variable names ¹​anaysis_time, ²​timestamp, ³​position,
#   ⁴​identification, ⁵​inj_type

These can be filtered out

Show the code
before <- nrow(toc)
toc <- toc %>%
  filter(!(id %in% i))
after <- nrow(toc)
cat("Before : ", before, "\n")
Before :  14798 
Show the code
cat("Removed: ", before - after, "\n")
Removed:  16 
Show the code
cat("After  : ", after, "\n")
After  :  14782 

Filter out unrealistic small values

Show the code
toc %>% ggplot(aes(x=conc)) + 
  stat_density(bw = 1) +
  facet_grid(rows = vars(inj_type), scales = "free_y")

One can see for TC two peaks clearly separated. The values smaller than 5 will be discarded as they are unrealistically low and can be traced to measuring errors in the machine.

Show the code
toc %>% 
  filter(!(inj_type == "TC" & conc < 5)) %>% 
  ggplot(aes(x=conc)) + 
  stat_density(bw = 1) +
  facet_grid(rows = vars(inj_type), scales = "free_y")

These can be filtered out

Show the code
before <- nrow(toc)
toc <- toc %>%
  filter(!(inj_type == "TC" & conc < 5))
after <- nrow(toc)
cat("Before : ", before, "\n")
Before :  14782 
Show the code
cat("Removed: ", before - after, "\n")
Removed:  315 
Show the code
cat("After  : ", after, "\n")
After  :  14467 

As TOC is calculated as TC - IC = TOC we now have to re-calculate the TOC values

Show the code
TOC_calc <- toc %>% 
  mutate(an_id = paste(filename, position)) %>%
  pivot_wider(id_cols = an_id, names_from = inj_type, values_from = conc) %>%
  mutate(TOC_calc = TC - IC) %>%
  select(an_id, TOC_calc) %>%
  mutate(an_id = paste(an_id, "TOC"))

toc <- toc %>% 
  mutate(an_id = paste(filename, position, inj_type)) %>%
  left_join(TOC_calc, by = "an_id") %>%
  mutate( conc = ifelse(inj_type == "TOC", TOC_calc, conc) ) %>%
  select( -TOC_calc, -an_id)

And the plot again after re-calculation of the TOC values

Show the code
toc %>% 
  filter(!(inj_type == "TC" & conc < 5)) %>% 
  ggplot(aes(x=conc)) + 
  stat_density(bw = 1, na.rm = TRUE) +
  facet_grid(rows = vars(inj_type), scales = "free_y")

Plot before filtering

Now let’s look at the plots of the measurements per bottle per timestep

Show the code
plot_tocs_per_bottle_per_timestamp(db = db, c("TC", "TOC"))
Warning: Missing values are always removed in SQL aggregation functions.
Use `na.rm = TRUE` to silence this warning
This warning is displayed once every 8 hours.

Show the code
plot_tocs_per_bottle_per_timestamp(db = db, c("IC"))

Show the code
plot_tocs_per_bottle_per_timestamp(db = db, c("TN"))

Apply the filters to the database

Show the code
conn <- DBI::dbConnect(
  drv = RSQLite::SQLite(),
  db = db
)

DBI::dbBegin(conn)
DBI::dbWriteTable(
  conn,
  name = "toc__toc",
  value = toc,
  overwrite = TRUE,
  append = FALSE
)
DBI::dbCommit(conn)

DBI::dbDisconnect(conn)

Plot after filtering

Now let’s look at the plots of the measurements per bottle per timestep

Show the code
plot_tocs_per_bottle_per_timestamp(db = db, c("TC", "TOC"))

Show the code
plot_tocs_per_bottle_per_timestamp(db = db, c("IC"))

Show the code
plot_tocs_per_bottle_per_timestamp(db = db, c("TN"))

Distribution of the individual values per inj_type

Calculate the densities

Show the code
conc <- list(
  tc = list(
    original = toc_original$conc[toc$inj_type == "TC" | !is.null(toc$bottle)] |> density(bw = 0.1, na.rm = TRUE),
    removed  = toc$conc[toc$inj_type == "TC" | !is.null(toc$bottle)] |> density(bw = 0.1, na.rm = TRUE)
  ),
  toc = list(
    original = toc_original$conc[toc$inj_type == "TOC" | !is.null(toc$bottle)] |> density(bw = 0.1, na.rm = TRUE),
    removed  = toc$conc[toc$inj_type == "TOC" | !is.null(toc$bottle)] |> density(bw = 0.1, na.rm = TRUE)
  ),
  ic = list(
    original = toc_original$conc[toc$inj_type == "IC" | !is.null(toc$bottle)] |> density(bw = 0.1, na.rm = TRUE),
    removed  = toc$conc[toc$inj_type == "IC" | !is.null(toc$bottle)] |> density(bw = 0.1, na.rm = TRUE)
  ),  
  tn = list(
    original = toc_original$conc[toc$inj_type == "TN" | !is.null(toc$bottle)] |> density(bw = 0.1, na.rm = TRUE),
    removed  = toc$conc[toc$inj_type == "TN" | !is.null(toc$bottle)] |> density(bw = 0.1, na.rm = TRUE)
  )
)

TC

Show the code
plot(conc$tc$removed, col = "red")
lines(conc$tc$original)

TOC

Show the code
plot(conc$toc$removed, col = "red")
lines(conc$toc$original)

IC

Show the code
plot(conc$tc$removed, col = "red")
lines(conc$tc$original)

TN

Show the code
plot(conc$tn$removed, col = "red")
lines(conc$tn$original)

Number of measurements per timestamp, bottle and inj_type

We have to look if there are any duplicate measurements.

Show the code
dupl <- toc %>%
  group_by(timestamp, bottle, inj_type) %>%
  filter(!is.na(bottle)) %>%
  summarise(fn_1 = min(filename), fn_2 = max(filename), count = n(), min_conc = min(conc), max_conc = max(conc)) %>%
  filter(count > 1 & count < 10) %>%  
  mutate(diff = max_conc - min_conc) %>%
  arrange(inj_type, diff)
dupl %>% 
  group_by(fn_1, fn_2, timestamp) %>%
  summarise() %>%
  arrange(timestamp) %>%
  knitr::kable()
fn_1 fn_2 timestamp
LEEF_21_09_24 1-8 LEEF_21_09_24 und 21_09_27 B 20210924
LEEF_21_09_24 und 21_09_27 B LEEF_21_09_27 restB 20210927
LEEF_21_10_04 S22 - S30 LEEF_21_10_04 und 21_10_06A 20211004
LEEF_21_10_01 LEEF_21_10_29 und 21_11_01 20211101
LEEF_21_10_15 s8 - 26 LEEF_21_11_12 20211115
LEEF_21_10_15 s8 - 26 LEEF_21_15_10 rest 2 20211115
LEEF_21_12_01 und 21_12_03A LEEF_21_12_03B 20211203
LEEF_21_12_06 und 21_12_08 1-8A LEEF_21_12_08B 20211208
LEEF_22_01_19 und 22_01_21A LEEF_22_01_19 und 22_01_21B 20220119
LEEF_22_02_07B LEEF_22_02_07D 20220207
LEEF_22_02_14 und 22_02_16A LEEF_22_02_16B 20220216
LEEF_22_02_18A LEEF_22_02_18B 20220218
LEEF_22_03_30A LEEF_22_03_30B 20220330
LEEF_22_04_04A LEEF_22_04_04B 20220404

And some plots

Show the code
dat <- db_read_toc(db = db, duplicates = NULL) %>% 
  collect()
dat$id <- 1:nrow(dat)
ids <- dat %>% 
  filter(!is.na(bottle)) %>%
  group_by(timestamp, bottle, type) %>%
  summarize(min(id), max(id), n = n()) %>%
  filter(n > 1)
ids <- c(ids$`min(id)`, ids$`max(id)`) |>
  unique()
Show the code
pl <- dat %>% 
  filter(id %in% ids) %>%
  ggplot(aes(x=concentration)) + 
  stat_density(bw = 1) +
  facet_grid(rows = vars(type), scales = "free_y")
pl

Show the code
x <- dat %>% 
  filter(id %in% ids) %>%
  group_by(day, timestamp, bottle, type) %>%
  summarise(timestamp, bottle, type, mic = min(concentration), mac = max(concentration)) %>%
  mutate(mdiff = mac - mic)

 
pl <- x %>%
  filter(type == "TC") %>%
  ggplot2::ggplot(ggplot2::aes(x = mic, y = mac, colour = type)) +
  ggplot2::geom_point() +
  ggplot2::xlab("Smaler concentration Value") +
  ggplot2::ylab("Larger concentration Value")
  
ggMarginal(pl, type="histogram")

Show the code
pl <- x %>%
  filter(type == "IC") %>%
  ggplot2::ggplot(ggplot2::aes(x = mic, y = mac, colour = type)) +
  ggplot2::geom_point() +
  ggplot2::xlab("Smaler concentration Value") +
  ggplot2::ylab("Larger concentration Value")
  
ggMarginal(pl, type="histogram")

Show the code
pl <- x %>%
  filter(type == "TOC") %>%
  ggplot2::ggplot(ggplot2::aes(x = mic, y = mac, colour = type)) +
  ggplot2::geom_point() +
  ggplot2::xlab("Smaler concentration Value") +
  ggplot2::ylab("Larger concentration Value")
  
ggMarginal(pl, type="histogram")

Show the code
pl <- x %>%
  filter(type == "IC") %>%
  ggplot2::ggplot(ggplot2::aes(x = mic, y = mac, colour = type)) +
  ggplot2::geom_point() +
  ggplot2::xlab("Smaler concentration Value") +
  ggplot2::ylab("Larger concentration Value")
  
ggMarginal(pl, type="histogram")